clear
set mem 20m
set more off

use "House_1946_2006_jacobson.dta"
drop *exp po* switch*

*all vars should be -1 == GOP, 0 = open, 1 = dem
 
*coding as is 
/* 
3       0 = Republican incumbent  
        1 = Democratic incumbent  
        2 = Open seat held by Democrats  
        3 = Open seat held by Republicans  
        4 = Open seat, new (reapportioned)  
	  5 = Two incumbents (redistricting)
	  9 = Open seat, held by independent (Sanders VT)

4	  1 = Democrat won
	  0 = Republican won
        9 = 3rd party/independent won

5       Democrat's share of two-party vote

6       Democrat's share of two-party vote, previous election		
	   	
7       0 = not freshman
	  1 = freshman elected previous general election
	  2 = freshman elected in special election more than 1 year earlier
        3 = freshmen elected in a special election during this election year
        9 = seat not defended by major party incumbent    
 */
*rename vars to be consistent with 1946-2004 file used in 2006 paper
rename stcd statecd
rename pwin partyofwinner
rename dv demvote
rename dvp demvotelagged
rename inc incumbency
rename fr freshman


*update missing data

replace partyofwinner = 9 if partyofwinner == .

*see Gelman and King 1994 (AJPS) for var details


*count Bernie sanders as a Dem

replace demvote = 56 if statecd == 4501 & year ==1990
replace demvotelagged = 56 if statecd == 4501 & year ==1992
replace demvote = 58 if statecd == 4501 & year ==1992
replace demvotelagged = 58 if statecd == 4501 & year ==1994
replace demvote = 50 if statecd == 4501 & year ==1994
replace demvotelagged = 50 if statecd == 4501 & year ==1996
replace demvote = 55 if statecd == 4501 & year ==1996
replace demvotelagged = 55 if statecd == 4501 & year ==1998
replace demvote = 63 if statecd == 4501 & year ==1998
replace demvotelagged = 63 if statecd == 4501 & year ==2000
replace demvote = 69 if statecd == 4501 & year ==2000
replace demvotelagged = 69 if statecd == 4501 & year ==2002
replace demvote = 64 if statecd == 4501 & year ==2002
replace demvotelagged = 64 if statecd == 4501 & year ==2004
replace demvote = 67 if statecd == 4501 & year ==2004


*bernie sanders
replace partyofwinner = 1 if statecd == 4501 & year > 1988


*PAST LEGISLATIVE RESULTS

gen dvote = demvote/100
gen dlag = demvotelagged/100
drop demvote*

*impute uncontested to .25 (GOP) and .75 (DEM)

gen dvoteimputed = dvote
replace dvoteimputed = .25 if dvote ==. & partyofwinner == 0
replace dvoteimputed = .75 if dvote ==. & partyofwinner == 1

*2nd imputation: 0 and 1 

gen dvoteimputed100 = dvote
replace dvoteimputed100 = 0 if dvote ==. & partyofwinner == 0
replace dvoteimputed100 = 1 if dvote ==. & partyofwinner == 1


sort statecd year
gen partyofwinner_lag =.
replace partyofwinner_lag = partyofwinner[_n-1] if statecd==statecd[_n-1]

gen dlagimputed = dlag
replace dlagimputed = .25 if dlag ==. & partyofwinner_lag == 0
replace dlagimputed = .75 if dlag ==. & partyofwinner_lag ==1
*just use 75 and 25 if missing
replace dlagimputed = .25 if dlag ==. & partyofwinner == 0
replace dlagimputed = .75 if dlag ==. & partyofwinner ==1

*impute uncompetitive races to 10 and 90

gen dvoteimputed_adjusted = dvoteimputed
replace dvoteimputed_adjusted = .10 if dvote < .10 & dvote !=.
replace dvoteimputed_adjusted = .90 if dvote > .90 & dvote !=.

gen dlagimputed_adjusted = dlagimputed
replace dlagimputed_adjusted  = .10 if dlag < .10 & dlag !=.
replace dlagimputed_adjusted  = .90 if dlag > .90 & dlag !=.

*INCUMBENCY

gen incumb = -1 if incumbency ==0
replace incumb = 1 if incumbency == 1
replace incumb = 0 if incumb == .

*UNCONTESTED 

gen uncontested = 1 if dvote==. & partyofwinner == 1
replace uncontested = -1 if dvote==. & partyofwinner == 0
replace uncontested = 0 if uncontested == .

**PARTY CONTROL

gen partycontrol = 1 if incumbency == 1 | incumbency == 2
replace partycontrol = -1 if incumbency == 0 | incumbency == 3
replace partycontrol = 0 if partycontrol == .
drop incumbency

 *PARTY OF WINNER
rename partyofwinner winner
*drop all races where indepedents win
*drop if winner > 1 

*FRESHMAN 

gen fresh = 1 if incumb == 1 & (freshman > 0 & freshman < 4)
replace fresh = -1 if incumb == -1 & (freshman > 0 & freshman < 4)
replace fresh = 0 if fresh ==.
*drop freshman

*TAG REDISTRICTING YEARS

gen redist_year = 1 if year == 1952 | year == 1962 | year == 1972 | year == 1982 | year == 1992 | year ==2002
replace redist_year = 0 if redist_year == .

*DROP ALL OTHER REDISTRICTING

*drop if redist == 1

*Create vars for modified uniform swing calculations

gen incumb_effect = .
replace incumb_effect = .08 if year >=1980
replace incumb_effect = .08*((year-1942)/(1980-1942)) if year < 1980

sort statecd year
gen incumb_lag = incumb[_n-1] if statecd==statecd[_n-1]
replace incumb_lag = incumb if incumb_lag == .
gen incumb_change = incumb-incumb_lag

gen dvote_new = dlagimputed_adjusted
replace dvote_new = .25 if uncontested == -1
replace dvote_new = .75 if uncontested == 1
replace dvote_new = (dvote_new + incumb_effect*(incumb_change)) if uncontested == 0

compress
sort year statecd
save "House_1946_2006_updated.dta", replace
